Looks like there is a slightly higher rate of negative ratings for the high debt group, but also an even smaller increase in positive ratings.
likert.data <- d.both_completed %>%
select(high_debt_version, quality_post_task)
likert.data$quality_post_task <- revalue(likert.data$quality_post_task, c(
"-3"="Very Bad",
"-2"="Bad",
"-1"="Somewhat Bad",
"0"="Neutral",
"1"="Somewhat Good",
"2"="Good",
"3"="Very Good"
))
likert.data$high_debt_version <- revalue(likert.data$high_debt_version, c(
"true"="High Debt",
"false"="Low Debt"
))
ggplot(likert.data, aes(x=quality_post_task)) +
geom_bar(fill= "Light Blue") +
facet_grid(rows = vars(high_debt_version)) +
scale_y_continuous(limits = NULL, breaks = c(2,4,6,8), labels = c("2","4","6","8")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
As the data is collected from a likert scale we will use a cumulative family, indicating that each level on the scale is an incremental step. This model is also able to fit the data well.
We include high_debt_verison as a predictor in our model as this variable represent the very effect we want to measure. We also include a varying intercept for each individual to prevent the model from learning too much from single participants with extreme measurements.
We iterate over the model until we have sane priors.
own_quality.with <- extendable_model(
base_name = "own_quality",
base_formula = "quality_post_task ~ 1 + (1 | session)",
base_priors = c(
prior(normal(0, 2.5), class = "Intercept"),
#prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = d.both_completed,
base_control = list(adapt_delta = 0.95)
)
prior_summary(own_quality.with(only_priors= TRUE))
prior_summary(own_quality.with(sample_prior = "only"))
pp_check(own_quality.with(sample_prior = "only"), nsamples = 200, type = "bars")
We choose a beta parameter priors allowing for the beta parameter to account for 100% of the effect but that is skeptical to such strong effects from the beta parameter.
sim.size <- 1000
sim.intercept <- rnorm(sim.size, 0, 2.5)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- (plogis(sim.intercept + sim.beta) / plogis(sim.intercept) * 100) - 100
data.frame(x = sim.beta.diff) %>%
ggplot(aes(x)) +
geom_density() +
xlim(-150, 150) +
labs(
title = "Beta parameter prior influence",
x = "Estimate with beta as % of estimate without beta",
y = "Density"
)
We check the posterior distribution and can see that the model seems to have been able to fit the data well. Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.
pp_check(own_quality.with(), nsamples = 200, type = "bars")
summary(own_quality.with())
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + (1 | session)
## Data: as.data.frame(data) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.52 0.40 0.02 1.49 1.00 1391 2309
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept[1] -3.23 0.73 -4.84 -1.98 1.00 2356 2390
## Intercept[2] -0.84 0.36 -1.59 -0.16 1.00 3943 3089
## Intercept[3] 1.66 0.43 0.88 2.55 1.00 4207 3405
## Intercept[4] 2.38 0.52 1.45 3.50 1.00 4761 3693
## Intercept[5] 3.33 0.71 2.12 4.86 1.00 5351 3449
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(own_quality.with(), ask = FALSE)
# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
# deafult beta prior
beta_prior <- prior(normal(0, 1), class = "b")
We use loo to check some possible extensions on the model.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
# New model(s)
own_quality.with("var_names_new_good.ratio", beta_prior),
own_quality.with("var_names_copied_good.ratio", beta_prior),
own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"), beta_prior)
)
loo_result[2]
## $diffs
## elpd_diff
## own_quality.with() 0.0
## own_quality.with("var_names_copied_good.ratio", beta_prior) -0.1
## own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"), beta_prior) -0.5
## own_quality.with("var_names_new_good.ratio", beta_prior) -0.7
## se_diff
## own_quality.with() 0.0
## own_quality.with("var_names_copied_good.ratio", beta_prior) 0.6
## own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"), beta_prior) 0.6
## own_quality.with("var_names_new_good.ratio", beta_prior) 0.2
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1213
## (0.5, 0.7] (ok) 4 9.1% 437
## (0.7, 1] (bad) 1 2.3% 253
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("var_names_new_good.ratio", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.9 6.9
## p_loo 9.0 1.8
## looic 125.8 13.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 36 81.8% 1357
## (0.5, 0.7] (ok) 7 15.9% 481
## (0.7, 1] (bad) 1 2.3% 102
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("var_names_copied_good.ratio", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.4 6.8
## p_loo 8.6 1.6
## looic 124.7 13.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 1280
## (0.5, 0.7] (ok) 7 15.9% 561
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with(c("var_names_copied_good.ratio", "var_names_new_good.ratio"), beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.7 6.8
## p_loo 9.3 1.7
## looic 125.4 13.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 36 81.8% 1140
## (0.5, 0.7] (ok) 8 18.2% 514
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
# New model(s)
own_quality.with("reused_logic_constructor", beta_prior),
own_quality.with("reused_logic_validation", beta_prior),
own_quality.with(c("reused_logic_validation", "reused_logic_constructor"), beta_prior)
)
loo_result[2]
## $diffs
## elpd_diff
## own_quality.with("reused_logic_validation", beta_prior) 0.0
## own_quality.with(c("reused_logic_validation", "reused_logic_constructor"), beta_prior) -0.4
## own_quality.with("reused_logic_constructor", beta_prior) -1.5
## own_quality.with() -2.2
## se_diff
## own_quality.with("reused_logic_validation", beta_prior) 0.0
## own_quality.with(c("reused_logic_validation", "reused_logic_constructor"), beta_prior) 0.3
## own_quality.with("reused_logic_constructor", beta_prior) 1.1
## own_quality.with() 1.5
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1213
## (0.5, 0.7] (ok) 4 9.1% 437
## (0.7, 1] (bad) 1 2.3% 253
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("reused_logic_constructor", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.5 6.6
## p_loo 8.9 1.7
## looic 123.1 13.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 40 90.9% 914
## (0.5, 0.7] (ok) 3 6.8% 4000
## (0.7, 1] (bad) 1 2.3% 232
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("reused_logic_validation", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.1 6.1
## p_loo 8.3 1.5
## looic 120.1 12.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 40 90.9% 1004
## (0.5, 0.7] (ok) 4 9.1% 469
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with(c("reused_logic_validation", "reused_logic_constructor"), beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -60.4 6.1
## p_loo 8.9 1.6
## looic 120.9 12.2
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1308
## (0.5, 0.7] (ok) 6 13.6% 517
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
# New model(s)
own_quality.with("equals.exists", beta_prior),
own_quality.with("hashcode.exists", beta_prior),
own_quality.with(c("hashcode.state", "equals.exists"), beta_prior)
)
loo_result[2]
## $diffs
## elpd_diff
## own_quality.with() 0.0
## own_quality.with(c("hashcode.state", "equals.exists"), beta_prior) -0.5
## own_quality.with("hashcode.exists", beta_prior) -0.6
## own_quality.with("equals.exists", beta_prior) -0.8
## se_diff
## own_quality.with() 0.0
## own_quality.with(c("hashcode.state", "equals.exists"), beta_prior) 1.1
## own_quality.with("hashcode.exists", beta_prior) 0.2
## own_quality.with("equals.exists", beta_prior) 0.2
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1213
## (0.5, 0.7] (ok) 4 9.1% 437
## (0.7, 1] (bad) 1 2.3% 253
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("equals.exists", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.1 6.9
## p_loo 9.5 1.8
## looic 126.1 13.8
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 924
## (0.5, 0.7] (ok) 6 13.6% 687
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("hashcode.exists", beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.9 6.9
## p_loo 9.3 1.8
## looic 125.7 13.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1162
## (0.5, 0.7] (ok) 5 11.4% 322
## (0.7, 1] (bad) 1 2.3% 841
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with(c("hashcode.state", "equals.exists"), beta_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.7 6.7
## p_loo 10.0 1.9
## looic 125.4 13.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1337
## (0.5, 0.7] (ok) 6 13.6% 427
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
# New model(s)
own_quality.with("sonarqube_issues.s"),
own_quality.with("documentation")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## own_quality.with() 0.0 0.0
## own_quality.with("sonarqube_issues.s") -1.6 1.2
## own_quality.with("documentation") -2.5 1.3
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1213
## (0.5, 0.7] (ok) 4 9.1% 437
## (0.7, 1] (bad) 1 2.3% 253
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("sonarqube_issues.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.9 7.3
## p_loo 10.2 2.4
## looic 127.7 14.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 331
## (0.5, 0.7] (ok) 3 6.8% 597
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality.with("documentation")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.7 7.4
## p_loo 11.5 2.5
## looic 129.4 14.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 35 79.5% 723
## (0.5, 0.7] (ok) 8 18.2% 464
## (0.7, 1] (bad) 1 2.3% 177
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
We create a new base model based on what we learned from extending the previous model with different quality measurements. We take care to not include redundant quality indicators.
own_quality1.with <- extendable_model(
base_name = "own_quality1",
base_formula = "quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session)",
base_priors = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = data.frame(d.both_completed),
base_control = list(adapt_delta = 0.95)
)
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
own_quality1.with(),
# New model(s)
own_quality1.with("work_domain"),
own_quality1.with("work_experience_programming.s"),
own_quality1.with("work_experience_java.s"),
own_quality1.with("education_field"),
own_quality1.with("mo(education_level)", edlvl_prior),
own_quality1.with("workplace_peer_review"),
own_quality1.with("workplace_td_tracking"),
own_quality1.with("workplace_pair_programming"),
own_quality1.with("workplace_coding_standards"),
own_quality1.with("scenario"),
own_quality1.with("group")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## own_quality1.with("mo(education_level)", edlvl_prior) 0.0 0.0
## own_quality.with() -1.3 3.0
## own_quality1.with("education_field") -2.7 2.7
## own_quality1.with() -3.2 2.7
## own_quality1.with("workplace_coding_standards") -3.2 2.4
## own_quality1.with("group") -3.4 2.2
## own_quality1.with("workplace_peer_review") -3.4 2.4
## own_quality1.with("workplace_pair_programming") -3.7 2.6
## own_quality1.with("workplace_td_tracking") -3.9 2.2
## own_quality1.with("scenario") -4.0 2.6
## own_quality1.with("work_experience_java.s") -4.3 2.2
## own_quality1.with("work_domain") -4.3 2.6
## own_quality1.with("work_experience_programming.s") -4.4 1.9
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1213
## (0.5, 0.7] (ok) 4 9.1% 437
## (0.7, 1] (bad) 1 2.3% 253
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.1 6.7
## p_loo 13.3 2.5
## looic 128.3 13.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 35 79.5% 820
## (0.5, 0.7] (ok) 8 18.2% 218
## (0.7, 1] (bad) 1 2.3% 303
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("work_domain")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -65.2 7.1
## p_loo 16.8 3.3
## looic 130.5 14.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 714
## (0.5, 0.7] (ok) 5 11.4% 256
## (0.7, 1] (bad) 1 2.3% 109
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -65.4 7.1
## p_loo 15.2 3.1
## looic 130.8 14.1
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 277
## (0.5, 0.7] (ok) 5 11.4% 344
## (0.7, 1] (bad) 2 4.5% 307
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -65.2 6.9
## p_loo 14.9 2.9
## looic 130.4 13.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 36 81.8% 961
## (0.5, 0.7] (ok) 7 15.9% 284
## (0.7, 1] (bad) 1 2.3% 230
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.6 6.6
## p_loo 13.7 2.4
## looic 127.3 13.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 490
## (0.5, 0.7] (ok) 3 6.8% 347
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.0 7.2
## p_loo 12.8 2.6
## looic 121.9 14.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 314
## (0.5, 0.7] (ok) 2 4.5% 706
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.4 6.9
## p_loo 14.1 2.6
## looic 128.8 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 660
## (0.5, 0.7] (ok) 7 15.9% 200
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.8 6.9
## p_loo 14.5 2.8
## looic 129.7 13.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 36 81.8% 842
## (0.5, 0.7] (ok) 6 13.6% 312
## (0.7, 1] (bad) 2 4.5% 174
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_pair_programming")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.7 6.8
## p_loo 14.3 2.6
## looic 129.4 13.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 343
## (0.5, 0.7] (ok) 5 11.4% 505
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.2 6.7
## p_loo 13.8 2.5
## looic 128.4 13.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 878
## (0.5, 0.7] (ok) 6 13.6% 385
## (0.7, 1] (bad) 1 2.3% 283
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("scenario")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.9 6.7
## p_loo 14.4 2.6
## looic 129.8 13.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 36 81.8% 1003
## (0.5, 0.7] (ok) 7 15.9% 254
## (0.7, 1] (bad) 1 2.3% 2071
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("group")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.4 6.9
## p_loo 14.6 2.6
## looic 128.7 13.9
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 391
## (0.5, 0.7] (ok) 3 6.8% 431
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
own_quality1.with(),
own_quality1.with("mo(education_level)", edlvl_prior),
own_quality1.with("education_field"),
own_quality1.with("workplace_peer_review"),
# New model(s)
own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior),
own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior),
own_quality1.with(c("education_field", "workplace_peer_review"))
)
loo_result[2]
## $diffs
## elpd_diff
## own_quality1.with("mo(education_level)", edlvl_prior) 0.0
## own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior) -1.0
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior) -1.2
## own_quality.with() -1.3
## own_quality1.with("education_field") -2.7
## own_quality1.with() -3.2
## own_quality1.with("workplace_peer_review") -3.4
## own_quality1.with(c("education_field", "workplace_peer_review")) -3.8
## se_diff
## own_quality1.with("mo(education_level)", edlvl_prior) 0.0
## own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior) 0.6
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior) 0.2
## own_quality.with() 3.0
## own_quality1.with("education_field") 2.7
## own_quality1.with() 2.7
## own_quality1.with("workplace_peer_review") 2.4
## own_quality1.with(c("education_field", "workplace_peer_review")) 2.5
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1213
## (0.5, 0.7] (ok) 4 9.1% 437
## (0.7, 1] (bad) 1 2.3% 253
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.1 6.7
## p_loo 13.3 2.5
## looic 128.3 13.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 35 79.5% 820
## (0.5, 0.7] (ok) 8 18.2% 218
## (0.7, 1] (bad) 1 2.3% 303
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.0 7.2
## p_loo 12.8 2.6
## looic 121.9 14.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 314
## (0.5, 0.7] (ok) 2 4.5% 706
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.6 6.6
## p_loo 13.7 2.4
## looic 127.3 13.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 490
## (0.5, 0.7] (ok) 3 6.8% 347
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.4 6.9
## p_loo 14.1 2.6
## looic 128.8 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 660
## (0.5, 0.7] (ok) 7 15.9% 200
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.9 7.2
## p_loo 14.1 2.7
## looic 123.8 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1254
## (0.5, 0.7] (ok) 5 11.4% 382
## (0.7, 1] (bad) 1 2.3% 222
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.1 7.4
## p_loo 14.1 2.8
## looic 124.2 14.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 508
## (0.5, 0.7] (ok) 5 11.4% 212
## (0.7, 1] (bad) 1 2.3% 724
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("education_field", "workplace_peer_review"))`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.7 6.9
## p_loo 15.0 2.7
## looic 129.5 13.7
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 36 81.8% 832
## (0.5, 0.7] (ok) 6 13.6% 354
## (0.7, 1] (bad) 2 4.5% 237
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
loo_result <- loo(
# Benchmark model(s)
own_quality.with(),
own_quality1.with(),
own_quality1.with("mo(education_level)", edlvl_prior),
own_quality1.with("education_field"),
own_quality1.with("workplace_peer_review"),
own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior),
own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior),
# New model(s)
own_quality1.with(c("mo(education_level)", "education_field", "workplace_peer_review"), edlvl_prior)
)
loo_result[2]
## $diffs
## elpd_diff
## own_quality1.with("mo(education_level)", edlvl_prior) 0.0
## own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior) -1.0
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior) -1.2
## own_quality.with() -1.3
## own_quality1.with(c("mo(education_level)", "education_field", "workplace_peer_review"), edlvl_prior) -1.7
## own_quality1.with("education_field") -2.7
## own_quality1.with() -3.2
## own_quality1.with("workplace_peer_review") -3.4
## se_diff
## own_quality1.with("mo(education_level)", edlvl_prior) 0.0
## own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior) 0.6
## own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior) 0.2
## own_quality.with() 3.0
## own_quality1.with(c("mo(education_level)", "education_field", "workplace_peer_review"), edlvl_prior) 0.4
## own_quality1.with("education_field") 2.7
## own_quality1.with() 2.7
## own_quality1.with("workplace_peer_review") 2.4
loo_result[1]
## $loos
## $loos$`own_quality.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.2 6.8
## p_loo 8.2 1.6
## looic 124.5 13.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1213
## (0.5, 0.7] (ok) 4 9.1% 437
## (0.7, 1] (bad) 1 2.3% 253
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.1 6.7
## p_loo 13.3 2.5
## looic 128.3 13.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 35 79.5% 820
## (0.5, 0.7] (ok) 8 18.2% 218
## (0.7, 1] (bad) 1 2.3% 303
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.0 7.2
## p_loo 12.8 2.6
## looic 121.9 14.5
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 42 95.5% 314
## (0.5, 0.7] (ok) 2 4.5% 706
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -63.6 6.6
## p_loo 13.7 2.4
## looic 127.3 13.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 41 93.2% 490
## (0.5, 0.7] (ok) 3 6.8% 347
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -64.4 6.9
## p_loo 14.1 2.6
## looic 128.8 13.7
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 37 84.1% 660
## (0.5, 0.7] (ok) 7 15.9% 200
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("mo(education_level)", "education_field"), edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -61.9 7.2
## p_loo 14.1 2.7
## looic 123.8 14.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 1254
## (0.5, 0.7] (ok) 5 11.4% 382
## (0.7, 1] (bad) 1 2.3% 222
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("mo(education_level)", "workplace_peer_review"), edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.1 7.4
## p_loo 14.1 2.8
## looic 124.2 14.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 38 86.4% 508
## (0.5, 0.7] (ok) 5 11.4% 212
## (0.7, 1] (bad) 1 2.3% 724
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`own_quality1.with(c("mo(education_level)", "education_field", "workplace_peer_review"), edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -62.6 7.3
## p_loo 14.8 2.8
## looic 125.3 14.6
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 39 88.6% 1211
## (0.5, 0.7] (ok) 5 11.4% 164
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
We pick some of our top performing models as candidates and inspect them closer.
The candidate models are named and listed in order of complexity.
We select the simplest model as a baseline.
own_quality0 <- brm(
"quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session)",
prior = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/own_quality0",
file_refit = "on_change",
control = list(adapt_delta = 0.95),
seed = 20210421
)
summary(own_quality0)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.53 0.42 0.01 1.54 1.01 1325 1583
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -4.11 1.20 -6.50 -1.90 1.00 4341
## Intercept[2] -1.61 0.99 -3.54 0.28 1.00 4733
## Intercept[3] 1.10 1.02 -0.88 3.10 1.00 4237
## Intercept[4] 1.87 1.04 -0.17 3.92 1.00 4409
## Intercept[5] 2.84 1.15 0.58 5.11 1.00 4473
## var_names_copied_good.ratio 0.19 0.69 -1.15 1.54 1.00 4769
## var_names_new_good.ratio -0.20 0.75 -1.67 1.24 1.00 5185
## reused_logic_validationfalse -1.26 0.69 -2.60 0.08 1.00 4198
## equals.existsFALSE -0.11 0.58 -1.28 0.98 1.00 4454
## sonarqube_issues.s 0.14 0.32 -0.49 0.76 1.00 4133
## documentationIncorrect -0.09 0.71 -1.50 1.30 1.00 5126
## documentationNone -0.31 0.66 -1.60 0.97 1.00 4975
## Tail_ESS
## Intercept[1] 3079
## Intercept[2] 2832
## Intercept[3] 2735
## Intercept[4] 3308
## Intercept[5] 3123
## var_names_copied_good.ratio 3147
## var_names_new_good.ratio 3297
## reused_logic_validationfalse 3001
## equals.existsFALSE 3015
## sonarqube_issues.s 2578
## documentationIncorrect 2954
## documentationNone 2900
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(own_quality0)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.007544144 0.5378031 -1.1171007 1.2028535
## 6033d90a5af2c702367b3a96 0.026349309 0.5466381 -1.1124962 1.2344035
## 6034fc165af2c702367b3a98 0.199248293 0.6079325 -0.8149398 1.7834728
## 603500725af2c702367b3a99 0.199402894 0.5992824 -0.8462318 1.7257887
## 603f97625af2c702367b3a9d 0.181640433 0.5886396 -0.8672999 1.6859825
## 603fd5d95af2c702367b3a9e 0.215403275 0.6023723 -0.7959921 1.7537103
## 60409b7b5af2c702367b3a9f -0.279325591 0.6448898 -1.9697368 0.7493855
## 604b82b5a7718fbed181b336 -0.115821322 0.5749905 -1.5056955 0.9305400
## 6050c1bf856f36729d2e5218 -0.352016889 0.6834703 -2.1384807 0.5989402
## 6050e1e7856f36729d2e5219 0.276076359 0.6306797 -0.7115562 1.8906685
## 6055fdc6856f36729d2e521b -0.086409637 0.5756934 -1.4269732 1.0319775
## 60589862856f36729d2e521f 0.041518670 0.6130306 -1.2091130 1.4609350
## 605afa3a856f36729d2e5222 0.104762733 0.5701426 -1.0405518 1.5000523
## 605c8bc6856f36729d2e5223 0.111814931 0.5547550 -0.9721043 1.4165055
## 605f3f2d856f36729d2e5224 -0.280995424 0.6628468 -2.0003600 0.6802960
## 605f46c3856f36729d2e5225 -0.417435803 0.7395847 -2.3814232 0.5366846
## 60605337856f36729d2e5226 -0.045221329 0.5696502 -1.3583465 1.1915155
## 60609ae6856f36729d2e5228 -0.097855010 0.5657435 -1.4728868 0.9988984
## 6061ce91856f36729d2e522e 0.250592281 0.6216485 -0.6978318 1.8753488
## 6061f106856f36729d2e5231 0.040844497 0.5470813 -1.1148965 1.3339530
## 6068ea9f856f36729d2e523e 0.227096833 0.5933363 -0.7592833 1.7386085
## 6075ab05856f36729d2e5247 -0.409990258 0.7074377 -2.2452432 0.5339770
plot(own_quality0, ask = FALSE)
pp_check(own_quality0, nsamples = 200, type = "bars")
We select the best performing model with one variable.
own_quality1 <- brm(
"quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session) +
mo(education_level)",
prior = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd"),
prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/own_quality1",
file_refit = "on_change",
control = list(adapt_delta = 0.95),
seed = 20210421
)
summary(own_quality1)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) + mo(education_level)
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.39 0.33 0.01 1.21 1.00 2323 2272
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -5.51 1.27 -8.13 -3.10 1.00 5249
## Intercept[2] -2.98 1.13 -5.26 -0.86 1.00 5784
## Intercept[3] 0.06 1.08 -2.10 2.19 1.00 5490
## Intercept[4] 0.91 1.10 -1.25 3.09 1.00 5568
## Intercept[5] 1.93 1.20 -0.35 4.35 1.00 6191
## var_names_copied_good.ratio 0.29 0.67 -1.05 1.58 1.00 5988
## var_names_new_good.ratio -0.21 0.72 -1.65 1.20 1.00 6406
## reused_logic_validationfalse -1.14 0.70 -2.52 0.21 1.00 6236
## equals.existsFALSE -0.01 0.57 -1.14 1.12 1.00 7118
## sonarqube_issues.s 0.33 0.33 -0.35 0.95 1.00 6210
## documentationIncorrect -0.22 0.73 -1.61 1.24 1.00 6203
## documentationNone -0.36 0.67 -1.72 0.94 1.00 6927
## moeducation_level -0.61 0.23 -1.08 -0.15 1.00 4778
## Tail_ESS
## Intercept[1] 3079
## Intercept[2] 3107
## Intercept[3] 3237
## Intercept[4] 3466
## Intercept[5] 3596
## var_names_copied_good.ratio 3520
## var_names_new_good.ratio 3343
## reused_logic_validationfalse 2763
## equals.existsFALSE 3215
## sonarqube_issues.s 3286
## documentationIncorrect 2745
## documentationNone 2790
## moeducation_level 3363
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## moeducation_level1[1] 0.21 0.12 0.03 0.49 1.00 6745
## moeducation_level1[2] 0.29 0.15 0.06 0.61 1.00 7330
## moeducation_level1[3] 0.24 0.13 0.04 0.52 1.00 6463
## moeducation_level1[4] 0.26 0.13 0.05 0.55 1.00 6530
## Tail_ESS
## moeducation_level1[1] 2187
## moeducation_level1[2] 2916
## moeducation_level1[3] 2551
## moeducation_level1[4] 3147
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(own_quality1)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.05404263 0.4378112 -0.8401622 1.1047428
## 6033d90a5af2c702367b3a96 -0.06002463 0.4309735 -1.1000195 0.7713968
## 6034fc165af2c702367b3a98 0.17193452 0.5124783 -0.6343972 1.5305528
## 603500725af2c702367b3a99 0.06373767 0.4634091 -0.8344410 1.1645613
## 603f97625af2c702367b3a9d 0.14187385 0.4902820 -0.6580527 1.4093610
## 603fd5d95af2c702367b3a9e 0.07092876 0.4426086 -0.8292865 1.1288403
## 60409b7b5af2c702367b3a9f -0.15846507 0.4836514 -1.4506767 0.6647901
## 604b82b5a7718fbed181b336 -0.01160294 0.4636159 -1.1102332 1.0185163
## 6050c1bf856f36729d2e5218 -0.18019263 0.4873838 -1.4618342 0.5904645
## 6050e1e7856f36729d2e5219 0.13133339 0.4920097 -0.6834389 1.4209290
## 6055fdc6856f36729d2e521b -0.06227776 0.4531277 -1.1477770 0.8548694
## 60589862856f36729d2e521f 0.02721519 0.5009140 -0.9908834 1.1614400
## 605afa3a856f36729d2e5222 0.14560810 0.4915207 -0.6716086 1.4682195
## 605c8bc6856f36729d2e5223 0.19361782 0.4892051 -0.5516536 1.4914800
## 605f3f2d856f36729d2e5224 -0.14513370 0.5020033 -1.3409148 0.6613088
## 605f46c3856f36729d2e5225 -0.15417361 0.4885515 -1.4180525 0.6580545
## 60605337856f36729d2e5226 0.04309800 0.4527299 -0.8896066 1.0979900
## 60609ae6856f36729d2e5228 -0.07306372 0.4237647 -1.1367607 0.7281057
## 6061ce91856f36729d2e522e 0.11040523 0.4627137 -0.7124353 1.2930425
## 6061f106856f36729d2e5231 -0.03841978 0.4580837 -1.1343695 0.8870261
## 6068ea9f856f36729d2e523e 0.01222473 0.4514330 -0.9414272 1.0584843
## 6075ab05856f36729d2e5247 -0.18685381 0.5141471 -1.5464165 0.5980110
plot(own_quality1, ask = FALSE)
pp_check(own_quality1, nsamples = 200, type = "bars")
We select the best performing model with two variables.
own_quality2 <- brm(
"quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session) +
mo(education_level) +
education_field",
prior = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd"),
prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/own_quality2",
file_refit = "on_change",
control = list(adapt_delta = 0.95),
seed = 20210421
)
summary(own_quality2)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) + mo(education_level) + education_field
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.43 0.36 0.01 1.30 1.00 2146 2280
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -5.37 1.44 -8.33 -2.67 1.00
## Intercept[2] -2.80 1.29 -5.31 -0.40 1.00
## Intercept[3] 0.29 1.25 -2.18 2.73 1.00
## Intercept[4] 1.14 1.26 -1.33 3.62 1.00
## Intercept[5] 2.16 1.33 -0.42 4.79 1.00
## var_names_copied_good.ratio 0.29 0.69 -1.07 1.64 1.00
## var_names_new_good.ratio -0.14 0.78 -1.65 1.42 1.00
## reused_logic_validationfalse -1.14 0.70 -2.52 0.20 1.00
## equals.existsFALSE 0.02 0.58 -1.12 1.15 1.00
## sonarqube_issues.s 0.31 0.34 -0.37 0.94 1.00
## documentationIncorrect -0.24 0.72 -1.65 1.17 1.00
## documentationNone -0.32 0.67 -1.60 1.00 1.00
## education_fieldInteractionDesign -0.56 0.86 -2.28 1.15 1.00
## education_fieldNone 0.06 0.85 -1.60 1.69 1.00
## education_fieldSoftwareEngineering 0.19 0.60 -0.96 1.36 1.00
## moeducation_level -0.59 0.25 -1.10 -0.12 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5199 3422
## Intercept[2] 5585 3327
## Intercept[3] 6201 3051
## Intercept[4] 6407 3028
## Intercept[5] 6544 3288
## var_names_copied_good.ratio 5793 3248
## var_names_new_good.ratio 7114 3427
## reused_logic_validationfalse 6010 2845
## equals.existsFALSE 7462 3079
## sonarqube_issues.s 6612 3107
## documentationIncorrect 6797 3258
## documentationNone 6972 2896
## education_fieldInteractionDesign 7315 2714
## education_fieldNone 6089 2955
## education_fieldSoftwareEngineering 6226 2993
## moeducation_level 3807 3189
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## moeducation_level1[1] 0.22 0.12 0.04 0.50 1.00 6824
## moeducation_level1[2] 0.28 0.15 0.05 0.61 1.00 7371
## moeducation_level1[3] 0.24 0.13 0.04 0.54 1.00 7464
## moeducation_level1[4] 0.26 0.13 0.04 0.56 1.00 6972
## Tail_ESS
## moeducation_level1[1] 2784
## moeducation_level1[2] 2231
## moeducation_level1[3] 2867
## moeducation_level1[4] 2936
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(own_quality2)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.06707319 0.4786510 -0.9364787 1.1649648
## 6033d90a5af2c702367b3a96 -0.06519388 0.4818979 -1.2238912 0.9026610
## 6034fc165af2c702367b3a98 0.17878985 0.5390886 -0.7015561 1.5875365
## 603500725af2c702367b3a99 0.05563192 0.4978533 -0.9889233 1.2210705
## 603f97625af2c702367b3a9d 0.16407815 0.5301942 -0.7069535 1.5510698
## 603fd5d95af2c702367b3a9e 0.07908452 0.4679229 -0.8040373 1.2303415
## 60409b7b5af2c702367b3a9f -0.19082355 0.5421040 -1.6213565 0.6600580
## 604b82b5a7718fbed181b336 -0.02133303 0.4931377 -1.1634473 1.0612432
## 6050c1bf856f36729d2e5218 -0.20811168 0.5405688 -1.6586460 0.6469005
## 6050e1e7856f36729d2e5219 0.16145570 0.5250060 -0.6797545 1.5386985
## 6055fdc6856f36729d2e521b -0.07425908 0.4938812 -1.2811685 0.8687660
## 60589862856f36729d2e521f 0.04762910 0.5333752 -1.0692178 1.2932448
## 605afa3a856f36729d2e5222 0.16273345 0.5170139 -0.7030329 1.5339918
## 605c8bc6856f36729d2e5223 0.19088312 0.5277696 -0.6427013 1.5725825
## 605f3f2d856f36729d2e5224 -0.15860124 0.5468580 -1.5236965 0.7425932
## 605f46c3856f36729d2e5225 -0.20630554 0.5529608 -1.7240290 0.6601076
## 60605337856f36729d2e5226 0.02047195 0.4980797 -1.0720127 1.0965478
## 60609ae6856f36729d2e5228 -0.07282350 0.4905711 -1.2983055 0.9280609
## 6061ce91856f36729d2e522e 0.11041103 0.5100814 -0.8357697 1.4116710
## 6061f106856f36729d2e5231 -0.05334549 0.4780675 -1.1574102 0.9178202
## 6068ea9f856f36729d2e523e 0.02118126 0.5312138 -1.1108683 1.2592568
## 6075ab05856f36729d2e5247 -0.14294706 0.5499219 -1.5518935 0.8314299
plot(own_quality2, ask = FALSE)
pp_check(own_quality2, nsamples = 200, type = "bars")
We select the best performing model with three variables.
own_quality3 <- brm(
"quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session) +
mo(education_level) +
education_field +
workplace_peer_review",
prior = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd"),
prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
),
family = cumulative(),
data = as.data.frame(d.both_completed),
file = "fits/own_quality3",
file_refit = "on_change",
control = list(adapt_delta = 0.95),
seed = 20210421
)
summary(own_quality3)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session) + mo(education_level) + education_field + workplace_peer_review
## Data: as.data.frame(d.both_completed) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.43 0.35 0.02 1.30 1.00 1932 2370
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept[1] -5.44 1.45 -8.38 -2.60 1.00
## Intercept[2] -2.86 1.33 -5.49 -0.31 1.00
## Intercept[3] 0.24 1.30 -2.22 2.81 1.00
## Intercept[4] 1.10 1.32 -1.44 3.74 1.00
## Intercept[5] 2.15 1.40 -0.45 5.01 1.00
## var_names_copied_good.ratio 0.30 0.68 -1.03 1.67 1.00
## var_names_new_good.ratio -0.16 0.75 -1.62 1.31 1.00
## reused_logic_validationfalse -1.16 0.68 -2.56 0.18 1.00
## equals.existsFALSE 0.04 0.61 -1.15 1.22 1.00
## sonarqube_issues.s 0.34 0.34 -0.35 0.97 1.00
## documentationIncorrect -0.21 0.72 -1.63 1.24 1.00
## documentationNone -0.31 0.69 -1.68 1.01 1.00
## education_fieldInteractionDesign -0.55 0.81 -2.10 1.06 1.00
## education_fieldNone 0.05 0.85 -1.59 1.72 1.00
## education_fieldSoftwareEngineering 0.18 0.61 -1.04 1.38 1.00
## workplace_peer_reviewfalse -0.09 0.59 -1.22 1.04 1.00
## moeducation_level -0.61 0.26 -1.12 -0.11 1.00
## Bulk_ESS Tail_ESS
## Intercept[1] 5123 3498
## Intercept[2] 5931 3120
## Intercept[3] 6782 3334
## Intercept[4] 6843 3414
## Intercept[5] 7347 3324
## var_names_copied_good.ratio 7196 3269
## var_names_new_good.ratio 5915 3407
## reused_logic_validationfalse 5983 2933
## equals.existsFALSE 6203 2862
## sonarqube_issues.s 5741 3204
## documentationIncorrect 6023 3116
## documentationNone 6864 3218
## education_fieldInteractionDesign 8761 3533
## education_fieldNone 7733 2784
## education_fieldSoftwareEngineering 6871 3141
## workplace_peer_reviewfalse 6564 3365
## moeducation_level 4324 3653
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## moeducation_level1[1] 0.22 0.13 0.03 0.52 1.00 7946
## moeducation_level1[2] 0.28 0.15 0.05 0.60 1.00 7847
## moeducation_level1[3] 0.24 0.13 0.04 0.54 1.00 5721
## moeducation_level1[4] 0.26 0.14 0.05 0.57 1.00 7918
## Tail_ESS
## moeducation_level1[1] 2518
## moeducation_level1[2] 2878
## moeducation_level1[3] 2773
## moeducation_level1[4] 2931
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(own_quality3)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 0.08048896 0.4984087 -0.9139714 1.2668988
## 6033d90a5af2c702367b3a96 -0.07733800 0.4812650 -1.2069905 0.8666339
## 6034fc165af2c702367b3a98 0.18181679 0.5474059 -0.6720627 1.6469908
## 603500725af2c702367b3a99 0.07079538 0.4863141 -0.8777513 1.2633528
## 603f97625af2c702367b3a9d 0.17635879 0.5338025 -0.6599606 1.5408450
## 603fd5d95af2c702367b3a9e 0.07942892 0.4752573 -0.8484875 1.2559530
## 60409b7b5af2c702367b3a9f -0.18746119 0.5224465 -1.6139693 0.6401973
## 604b82b5a7718fbed181b336 -0.02491298 0.5192187 -1.2126570 1.0606760
## 6050c1bf856f36729d2e5218 -0.22249704 0.5647409 -1.7505915 0.6393064
## 6050e1e7856f36729d2e5219 0.13479343 0.5306805 -0.8074007 1.4929513
## 6055fdc6856f36729d2e521b -0.08189128 0.5013831 -1.2882493 0.8901105
## 60589862856f36729d2e521f 0.04645215 0.5352902 -1.0653733 1.2539298
## 605afa3a856f36729d2e5222 0.17706275 0.5291508 -0.6584650 1.5704603
## 605c8bc6856f36729d2e5223 0.21047017 0.5379432 -0.5955060 1.6297910
## 605f3f2d856f36729d2e5224 -0.15981022 0.5192424 -1.5082258 0.7088357
## 605f46c3856f36729d2e5225 -0.20304568 0.5523530 -1.7320892 0.6405932
## 60605337856f36729d2e5226 0.03444047 0.4999494 -0.9771633 1.1630840
## 60609ae6856f36729d2e5228 -0.07183398 0.4637736 -1.1755177 0.8377028
## 6061ce91856f36729d2e522e 0.11476500 0.5053778 -0.8343662 1.3554295
## 6061f106856f36729d2e5231 -0.05490970 0.4987499 -1.2555228 0.9308630
## 6068ea9f856f36729d2e523e 0.03765412 0.4961380 -0.9625379 1.2197075
## 6075ab05856f36729d2e5247 -0.14370579 0.5603121 -1.5660615 0.8145873
plot(own_quality3, ask = FALSE)
pp_check(own_quality3, nsamples = 200, type = "bars")
All candidate models look nice, none is significantly better than the others, we will proceed the model containing work experince as it otherwise ourd be added in the next step: own_quality0
Some participants did only complete one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.
own_quality0.all <- brm(
"quality_post_task ~ 1 +
var_names_copied_good.ratio +
var_names_new_good.ratio +
reused_logic_validation +
equals.exists +
sonarqube_issues.s +
documentation +
(1 | session)",
prior = c(
prior(normal(0, 2.5), class = "Intercept"),
prior(normal(0, 1), class = "b"),
prior(exponential(1), class = "sd")
),
family = cumulative(),
data = as.data.frame(d.completed),
file = "fits/own_quality0.all",
file_refit = "on_change",
control = list(adapt_delta = 0.95),
seed = 20210421
)
summary(own_quality0.all)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: quality_post_task ~ 1 + var_names_copied_good.ratio + var_names_new_good.ratio + reused_logic_validation + equals.exists + sonarqube_issues.s + documentation + (1 | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.48 0.39 0.02 1.42 1.00 1529 2061
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept[1] -4.82 1.25 -7.36 -2.44 1.00 4248
## Intercept[2] -3.51 1.07 -5.65 -1.40 1.00 5920
## Intercept[3] -1.44 0.98 -3.33 0.47 1.00 5712
## Intercept[4] 1.43 1.00 -0.44 3.43 1.00 4911
## Intercept[5] 2.17 1.04 0.22 4.21 1.00 4946
## Intercept[6] 3.11 1.14 0.96 5.47 1.00 5471
## var_names_copied_good.ratio 0.18 0.65 -1.11 1.48 1.00 5582
## var_names_new_good.ratio -0.03 0.75 -1.46 1.47 1.00 6033
## reused_logic_validationfalse -1.30 0.65 -2.56 -0.03 1.00 5801
## equals.existsFALSE -0.19 0.55 -1.25 0.90 1.00 5652
## sonarqube_issues.s 0.08 0.31 -0.58 0.66 1.00 5720
## documentationIncorrect -0.17 0.66 -1.47 1.11 1.00 5214
## documentationNone -0.30 0.60 -1.46 0.85 1.00 4596
## Tail_ESS
## Intercept[1] 3014
## Intercept[2] 3424
## Intercept[3] 3150
## Intercept[4] 3104
## Intercept[5] 2666
## Intercept[6] 2923
## var_names_copied_good.ratio 3485
## var_names_new_good.ratio 3237
## reused_logic_validationfalse 3142
## equals.existsFALSE 2731
## sonarqube_issues.s 2884
## documentationIncorrect 3086
## documentationNone 3141
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## disc 1.00 0.00 1.00 1.00 1.00 4000 4000
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(own_quality0.all)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 -0.3207778990 0.7551793 -2.4152945 0.7359722
## 6033d69a5af2c702367b3a95 0.0303740242 0.5044283 -1.0285582 1.1863075
## 6033d90a5af2c702367b3a96 0.0336190428 0.4948060 -1.0124235 1.1771378
## 6034fc165af2c702367b3a98 0.2087600884 0.5794296 -0.7144648 1.7238312
## 603500725af2c702367b3a99 0.2076320836 0.5417118 -0.6566448 1.5825878
## 603f84f15af2c702367b3a9b -0.0039225316 0.5346988 -1.1967598 1.1579527
## 603f97625af2c702367b3a9d 0.1898467092 0.5554716 -0.7637427 1.6179878
## 603fd5d95af2c702367b3a9e 0.2181241774 0.5720424 -0.7074675 1.7204790
## 60409b7b5af2c702367b3a9f -0.1874119621 0.5481002 -1.6154958 0.7393761
## 604b82b5a7718fbed181b336 -0.0514723843 0.5085569 -1.2761702 1.0070920
## 604f1239a7718fbed181b33f 0.0020780522 0.5286543 -1.1769237 1.1665053
## 6050c1bf856f36729d2e5218 -0.2515663282 0.5756611 -1.7694040 0.6122263
## 6050e1e7856f36729d2e5219 0.2628502526 0.6390896 -0.6757058 1.9578905
## 6055fdc6856f36729d2e521b -0.0508008015 0.5068746 -1.2568197 0.9480694
## 60579f2a856f36729d2e521e -0.1998431545 0.6065939 -1.7965615 0.8143592
## 60589862856f36729d2e521f 0.0242806543 0.5642225 -1.2487275 1.2835910
## 605a30a7856f36729d2e5221 0.0002864863 0.5528690 -1.2134425 1.2041893
## 605afa3a856f36729d2e5222 0.1171291908 0.5240187 -0.8773085 1.4335923
## 605c8bc6856f36729d2e5223 0.1278346966 0.5357250 -0.8280100 1.5131830
## 605f3f2d856f36729d2e5224 -0.1917274143 0.5604554 -1.6506178 0.7260868
## 605f46c3856f36729d2e5225 -0.3238618011 0.6267011 -1.9888450 0.5089534
## 60605337856f36729d2e5226 -0.0217438347 0.5274934 -1.1948120 1.1498588
## 60609ae6856f36729d2e5228 -0.0681263423 0.5144221 -1.3049072 0.9895947
## 6061ce91856f36729d2e522e 0.2160133201 0.5511559 -0.6768732 1.6578645
## 6061f106856f36729d2e5231 0.0781269903 0.5065569 -0.9185505 1.3131540
## 60672faa856f36729d2e523c 0.0182042026 0.5434397 -1.1529450 1.2639068
## 6068ea9f856f36729d2e523e 0.1966178382 0.5569204 -0.7398937 1.6265915
## 606db69d856f36729d2e5243 0.0106091331 0.5303579 -1.1863375 1.1808125
## 6075ab05856f36729d2e5247 -0.3483866292 0.6350839 -2.0893625 0.4830702
plot(own_quality0.all, ask = FALSE)
pp_check(own_quality0.all, nsamples = 200, type = "bars")
This means that our final model, with all data points and experience predictors, is own_quality0.all
To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.
mcmc_areas(own_quality0.all,
pars = c(
"b_var_names_copied_good.ratio",
"b_var_names_new_good.ratio",
"b_reused_logic_validationfalse",
"b_equals.existsFALSE",
"b_sonarqube_issues.s",
"b_documentationIncorrect",
"b_documentationNone"
),
prob = 0.95) + scale_y_discrete() +
scale_y_discrete(labels=c(
"Ratio of good copied var names",
"Ratio of good new var names",
"Duplicated validation logic",
"Missing equals implementation",
"Amount of sonarqube issues",
"Incorrect documentation",
"No documentation"
)) +
ggtitle("Beta parameters densities in self assesed quality model", subtitle = "Shaded region marks 95% of the density. Line marks the median")
As we have a low of effects playing small roles we will simulate two scenarios, one where the developer, according to us does well and one where the developers does not do so well and see if the participant rating approves with us.
scale_programming_experience <- function(x) {
(x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}
post_settings <- data_frame(
var_names_copied_good.ratio = c(0.9, 0.5),
var_names_new_good.ratio = c(0.9, 0.5),
reused_logic_validation = c("true", "false"),
equals.exists = c("TRUE", "FALSE"),
sonarqube_issues.s = c(-1, 1),
documentation = c("Correct", "Incorrect"),
session = NA
)
post <- posterior_predict(own_quality0.all, newdata = post_settings) %>%
melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
left_join(
rowid_to_column(post_settings, var= "settings_id"),
by = "settings_id"
) %>%
mutate(submission = revalue(reused_logic_validation, c("true" = "Good", "false" = "Bad"))) %>%
select(
estimate,
submission
)
post.nice <- post %>% mutate_at("estimate", function(x) revalue(as.ordered(x), c(
"1"="Very Bad",
"2"="Bad",
"3"="Somewhat Bad",
"4"="Neutral",
"5"="Somewhat Good",
"6"="Good",
"7"="Very Good"
)))
vline.data <- post.nice %>%
group_by(submission) %>%
summarize(z = mean(as.numeric(estimate)))
sprintf("Estimations for 3 years experience")
## [1] "Estimations for 3 years experience"
post.nice %>%
ggplot() +
geom_histogram(aes(x=estimate),fill= "Light Blue", stat = "count") +
geom_vline(aes(xintercept = z),
vline.data,
col = "Dark Blue",
lwd = 1)+
facet_grid(rows = vars(submission)) +
scale_y_continuous(limits = NULL, breaks = c(400,800,1200,1600), labels = c("10%","20%","30%","40%")) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank())
post.diff <- post %>% filter(submission == "Bad")
post.diff$estimate = post.diff$estimate - filter(post, submission == "Good")$estimate
post.diff %>%
ggplot(aes(x=estimate)) +
geom_boxplot(quantile_lines = TRUE, quantile_fun = hdi, vline_linetype = 2) +
labs(
title = "Submission rating diff",
subtitle = "Difference as: bad submission rating - good submission rating",
x = "Rating difference"
) +
scale_y_continuous(breaks = NULL)
We can then proceed to calculate some likelihoods:
bad_rated_higher <- sum(post.diff < 0)
good_rated_higher <- sum(post.diff > 0)
x <- good_rated_higher / bad_rated_higher
x
## [1] 2.138774
Participants were 114% more likely to rate the bad submission as worse then they were to rate the good submission as worse.